import numpy as np
import pandas as pd
import statsmodels.api as sm
from patsy import dmatrices
from statsmodels.formula.api import logit

data = {
    "患病": [0,1,1,0,0,1,0,1,1,1],
    "年龄组": ['青年','中年','老年','青年','中年','老年','青年','中年','老年','青年']
}

df = pd.DataFrame(data)

df = pd.get_dummies(df, columns=["年龄组"], drop_first=True)

y, X = dmatrices('患病 ~ 年龄组_老年, 年龄组_青年')
print(y, X)

'''X = df[["年龄组_老年", "年龄组_青年"]]
y = df["患病"]

X = sm.add_constant(X)
print(X)


# 将数据转换为numpy数组
X = np.asarray(X, dtype=float)
y = np.asarray(y, dtype=int)
print(X, y)'''

model = sm.Logit(y, X).fit()

print(model.summary())
